#coding:utf8
from email.header import Header

from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StringType, IntegerType

# 构建DataFrame方法1：定义StructType定义DataFrame的表结构来读取RDD
if __name__ == '__main__':
    # 构建SparkSession执行环境入口对象
    spark = SparkSession.builder.\
        appName("test").\
        master("local[*]").\
        getOrCreate()

    sc = spark.sparkContext

    rdd = sc.textFile("../data/input/sql/people.txt").\
        map(lambda s: s.split(",")).\
        map(lambda x:(x[0], int(x[1])))

    # 构建表结构的描述对象
    schema = StructType().add('name',StringType(),nullable=False).\
        add('age',IntegerType(),nullable=False)

    # 基于StructType构建DF
    df = spark.createDataFrame(rdd,schema)

    df.printSchema()
    df.show()